import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, RobustScaler, PowerTransformer
from sklearn.model_selection import train_test_split, KFold, cross_val_score, cross_val_predict, GridSearchCV, RandomizedSearchCV
from sklearn import svm, metrics, tree
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import RFE
import plotly.express as px
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline, make_pipeline
import scikitplot as skplt
From the data documentation:
Attribute Information:
LB - FHR baseline (beats per minute)
AC - # of accelerations per second
FM - # of fetal movements per second
UC - # of uterine contractions per second
DL - # of light decelerations per second
DS - # of severe decelerations per second
DP - # of prolongued decelerations per second
ASTV - percentage of time with abnormal short term variability
MSTV - mean value of short term variability
ALTV - percentage of time with abnormal long term variability
MLTV - mean value of long term variability
Width - width of FHR histogram
Min - minimum of FHR histogram
Max - Maximum of FHR histogram
Nmax - # of histogram peaks
Nzeros - # of histogram zeros
Mode - histogram mode
Mean - histogram mean
Median - histogram median
Variance - histogram variance
Tendency - histogram tendency
A - calm sleep
B - REM sleep
C - calm vigilance
D - active vigilance
SH - shift pattern (A or Susp with shifts)
AD - accelerative/decelerative pattern (stress situation)
DE - decelerative pattern (vagal stimulation)
LD - largely decelerative pattern
FS - flat-sinusoidal pattern (pathological state)
SUSP - suspect pattern
CLASS - FHR pattern class code (1 to 10) for class A to SUSP
NSP - fetal state class code (1=normal; 2=suspect; 3=pathologic)
ctg_data = pd.read_excel('data/CTG.xls', sheet_name='Data', header=1)
#quick look
ctg_data.head()
ctg_data.tail()
ctg_data.info()
The format of the data left some columns of null values, and there are some trailing rows that are incomplete, these will be dropped.
#drop null columns
ctg_data.dropna(axis=1, how='all', inplace=True)
#drop incomplete rows
ctg_data.dropna(axis=0, inplace=True)
#sanity check
ctg_data.isnull().any().all()
There is a set of 6 features that appear twice in the data, they are explored here.
#numerical information of each set of columns
ctg_data[['AC', 'FM', 'UC', 'DL', 'DS', 'DP']].describe()
ctg_data[['AC.1', 'FM.1', 'UC.1', 'DL.1', 'DS.1', 'DP.1']].describe()
#checking to see if the sister rows are correlated
sis_cols = pd.concat([ctg_data.loc[:,'AC': 'DP'], ctg_data.loc[:,'AC.1':'DP.1']], axis=1)
sis_corr = sis_cols.corr()
plt.figure(figsize=(20,10))
sns.heatmap(sis_corr, annot=True)
plt.show()
#DR column is a column of zeros
ctg_data['DR'].describe()
The sister columns are highly correlated, I am pretty sure the features labeled '.1' are transformed versions of the sister columns. What I believe is untransformed data will be dropped. Features 'b' and 'e' will alo be dropped because they are irelevant time data referring to when the cardiotocograph was measured. Finally 'DR' will be dropped since it is a column of zeros and will provide no predictive power.
#drop columns
ctg_data.drop(labels=['b', 'e', 'AC', 'FM', 'UC', 'DL', 'DS', 'DP', 'DR'], axis=1, inplace=True)
The feature 'Tendency', the targets 'CLASS' and 'NSP' and the class atribute variables 'A', 'B', 'C', 'D', 'E', 'AD', 'DE', 'LD', 'FS', and 'SUSP' all need to be categorical data.
#change data to category
ctg_data.loc[:,'Tendency':'NSP'] = ctg_data.loc[:,'Tendency':'NSP'].astype('category')
#drop duplicates, reset index
ctg_data = ctg_data.drop_duplicates().reset_index(drop=True)
#view of data
ctg_data.info()
ctg_data.shape
#numeric data
ctg_data.loc[:,'LB':'MLTV'].describe()
ctg_data.loc[:,'Width':'Variance'].describe()
#plot box plots for each feature, boxplots of each class inside of each feature, and boxplots of NSP inside of each feature
for i in ctg_data.columns[0:20]:
fig, (ax1, ax2, ax3) = plt.subplots(1,3,figsize=(15,5))
sns.boxplot(ax=ax1, y=ctg_data[i], data=ctg_data)
sns.boxplot(ax=ax2, x='CLASS', y=ctg_data[i], data=ctg_data)
sns.boxplot(ax=ax3, x='NSP', y=ctg_data[i], data=ctg_data)
plt.show()
plt.close()
The data has a pretty variable range with the smalles feature max equal to .001353, and the largest feature max equal to 160. many features have outliers, some have numerous outliers. I suspect these outliers will be really important in classification.
#scatter matrix of the data to gain a general view. Notice the class grapghs, and how seperated the fetal states are
fig = px.scatter_matrix(ctg_data,
dimensions=['LB', 'AC.1', 'FM.1', 'UC.1', 'DL.1', 'DS.1', 'DP.1','CLASS'], color="NSP")
fig.show()
fig = px.scatter_matrix(ctg_data,
dimensions=['ASTV', 'MSTV','ALTV', 'MLTV', 'CLASS'], color="NSP")
fig.show()
fig = px.scatter_matrix(ctg_data,
dimensions=['Width','Min', 'Max', 'Nmax', 'Nzeros', 'Mode', 'Mean',
'Median', 'Variance', 'Tendency','CLASS'], color="NSP")
fig.show()
#checking the correlation
corr = ctg_data.corr()
plt.figure(figsize=(20,20))
sns.heatmap(corr, cmap='coolwarm', annot=True)
plt.show()
There are some pretty clear separations in the data. There are also some pretty strong correlations on the data. A big take away is that NSP is highly segmented in CLASS. Meaning CLASS, a target will heavily influence the predictive power of any model. Due to the high correlation in the data feature elimination will be run in the future.
#data spread across class codes
df1 = ctg_data.groupby(['CLASS']).count()['LB']
df1
#data spread across pathelogical states
df2 = ctg_data.groupby(['NSP']).count()['LB']
df2
#data spread across both pathelogical states and class codes
df3 = ctg_data.groupby(['NSP', 'CLASS']).count()['LB']
df3.dropna(inplace=True)
df3
The data is fairly imbalanced with approximately 22% of our data being either 2 (suspect) or 3 (pathological) for pathelogic state. The data is also imbalanced in regards to class with some classes having nearly 12 times as many data points as others.
First models predicting NSP with be generated and trained as a baseline, then a model predicting CLASS will be generated and trained. These models will be combined into a chain classifier. These baseline medels will then be compared to data that has had the target imbalance fixed using oversampling of the minority classes and undersampling of the majority class/classes, depending on the target.
#split the data
ctg_data.loc[:,'Tendency':'SUSP'] = ctg_data.loc[:,'Tendency':'SUSP'].astype('int')
X_train, X_test, y_train, y_test = train_test_split(ctg_data.loc[:,'LB':'Tendency'], ctg_data[['CLASS', 'NSP']],
test_size=0.3, random_state=13, stratify=ctg_data['NSP'])
y_train_class = y_train['CLASS'].reset_index(drop=True)
y_test_class = y_test['CLASS'].reset_index(drop=True)
y_train_nsp = y_train['NSP'].reset_index(drop=True)
y_test_nsp = y_test['NSP'].reset_index(drop=True)
#scale the data, RobustScaler was used due to the many outliers in parts of the data
rob_scale = RobustScaler()
X_train_scale = rob_scale.fit_transform(X_train, y_train)
X_test_scale = rob_scale.transform(X_test)
#nsp
models = [('Support Vector Machine', svm.SVC(kernel='linear', probability=True, decision_function_shape='ovr', break_ties=True)),
('Logistic Regression', LogisticRegression(solver='newton-cg',n_jobs=-1)),
('K-Nearest Neighbors', KNeighborsClassifier(weights='distance')),
('Decision Tree', tree.DecisionTreeClassifier(random_state=13)),
('Random Forest', RandomForestClassifier(random_state=13, n_estimators=100))]
#loops the different models
recall = []
classifiers = []
for clf, model in models:
classifiers.append(clf)
model = model
model.fit(X_train_scale, y_train_nsp)
y_pred = model.predict(X_test_scale)
print(f'NSP models\n{clf}:')
print(classification_report(y_test_nsp, y_pred))
print(f'confusion matrix \n{confusion_matrix(y_test_nsp, y_pred)}\n')
recall.append(metrics.recall_score(y_test_nsp, y_pred, average='macro', zero_division=1))
import scikitplot as skplt
predicted_probas = model.predict_proba(X_test_scale)
skplt.metrics.plot_roc(y_test_nsp, predicted_probas, figsize=(8,8))
plt.show()
plt.close()
#dataframe for easy viewing
model_select = pd.DataFrame({'nsp_recall':recall}, index=classifiers)
display(model_select)
Both Decision trees and random forests predict fetal states with good recall. Random Forests have a stronger ROC curve.
#class
models = [('Support Vector Machine', svm.SVC(kernel='linear', probability=True, decision_function_shape='ovr', break_ties=True)),
('Logistic Regression', LogisticRegression(solver='newton-cg',n_jobs=-1)),
('K-Nearest Neighbors', KNeighborsClassifier(weights='distance')),
('Decision Tree', tree.DecisionTreeClassifier(random_state=13)),
('Random Forest', RandomForestClassifier(random_state=13, n_estimators=100))]
#loop the different models
recall = []
for clf, model in models:
model = model
model.fit(X_train_scale, y_train_class)
y_pred = model.predict(X_test_scale)
print(f'Class models\n{clf}:')
print(classification_report(y_test_class, y_pred))
print(f'confusion matrix \n{confusion_matrix(y_test_class, y_pred)}\n')
recall.append(metrics.recall_score(y_test_class, y_pred, average='macro', zero_division=1))
predicted_probas = model.predict_proba(X_test_scale)
skplt.metrics.plot_roc(y_test_class, predicted_probas, figsize=(10,10))
plt.show()
plt.close()
#dataframe for easy viewing
model_select['class_recall'] = recall
display(model_select)
Unfortunately none of the models predicted fetal heart rate patterns with great recall, but the random forest classifier does the best job with ~77% recall.
predict the values of CLASS in the test data set, then train models to predict NSP on data that includes actual CLASS values.
#random forest classifier (best from above)
ctg_rfc1 = RandomForestClassifier(random_state=13, n_estimators=100)
ctg_rfc1.fit(X_train_scale, y_train_class)
y_pred = ctg_rfc1.predict(X_test_scale)
#train the models on the actual class values, but test on predicted class values
#building a Dataframe for training
col = X_train.columns
dummy_col = ctg_data.loc[:,'LB':"SUSP"].columns
#the actual 'CLASS' values are not used in the test set instead predicted values are generated using the model that
#best predicted the 'CLASS' (Random Forest)
X_train2_scale = pd.DataFrame(X_train_scale, columns=col)
X_test2_scale = pd.DataFrame(X_test_scale, columns=col)
train_class = y_train_class
test_class = y_pred
#The data has their dummy variable as '-1' for 'no' and '1' for yes, swapping to '0' for no for consistency
train_dummies = pd.get_dummies(train_class).rename(columns={1.0:'A', 2.0:'B', 3.0:'C', 4.0:'D', 5.0:'E', 6.0:'AD',
7.0:'DE', 8.0:'LD', 9.0:'FS', 10.0:'SUSP'})
test_dummies = pd.get_dummies(test_class).rename(columns={1.0:'A', 2.0:'B', 3.0:'C', 4.0:'D', 5.0:'E', 6.0:'AD',
7.0:'DE', 8.0:'LD', 9.0:'FS', 10.0:'SUSP'})
X_train2_scale = pd.concat([X_train2_scale, train_dummies], axis=1)
X_train2_scale = X_train2_scale[dummy_col]
X_test2_scale = pd.concat([X_test2_scale, test_dummies], axis=1)
X_test2_scale = X_test2_scale[dummy_col]
#chain
models = [('Support Vector Machine', svm.SVC(kernel='linear', probability=True, decision_function_shape='ovr', break_ties=True)),
('Logistic Regression', LogisticRegression(solver='newton-cg',n_jobs=-1)),
('K-Nearest Neighbors', KNeighborsClassifier(weights='distance')),
('Decision Tree', tree.DecisionTreeClassifier(random_state=13)),
('Random Forest', RandomForestClassifier(random_state=13, n_estimators=100))]
#loop the different models
recall = []
classifiers = []
for clf, model in models:
model = model
model.fit(X_train2_scale, y_train_nsp)
y_pred = model.predict(X_test2_scale)
print(clf + ':')
print(classification_report(y_test_nsp, y_pred))
print(f'confusion matrix \n{confusion_matrix(y_test_nsp, y_pred)}\n')
recall.append(metrics.recall_score(y_test_nsp, y_pred, average='macro', zero_division=1))
predicted_probas = model.predict_proba(X_test2_scale)
skplt.metrics.plot_roc(y_test_nsp, predicted_probas, figsize=(10,10))
plt.show()
plt.close()
#dataframe for easy viewing
model_select['chain_recall'] = recall
display(model_select)
The best second classifiers in the chain are SVM's, Logistic Regression, and Random Forests. These three models each had ~87% recall, and strong ROC AUC scores. I hope to improve this accuracy further by oversampling and undersampling the data to improve target imbalances, and feature selection.
To improve model recall, I attempt to use oversampling and undersampling, cross validation, feature elimination and hyperparameter tuning.
#Target distribution in training set
X = pd.concat([X_train, y_train], axis=1)
display(X.groupby('NSP').count()['LB'])
display(X.groupby('CLASS').count()['LB'])
#generate over/under samplers (uses the above data split)
oversample = SMOTE(sampling_strategy={3.0:200, 4.0:200, 5.0:200, 7.0:200, 8.0:200, 9.0:200, 10.0:200}, random_state=13)
undersample = RandomUnderSampler(sampling_strategy={1.0:200, 2.0:200}, random_state=13)
#cross validate results for random forest class model, using smote on each fold.
kf = KFold(n_splits=10, shuffle=True, random_state=13)
cv = []
stds = []
indx = []
indx.append('k folds class mean recall')
#feature selection
rf= RandomForestClassifier(random_state=13)
rfe = RFE(rf, n_features_to_select=15)
pipe = Pipeline([('rscale', rob_scale), ('os', oversample), ('us', undersample), ('rfe', rfe),
('rf', RandomForestClassifier(random_state=13, n_estimators=100))])
score = cross_val_score(pipe, X_train, y_train_class, scoring='recall_macro', cv=kf)
cv.append(score.mean())
stds.append(score.std())
print(clf + ':')
print(f'k folds (k=10) mean recall: {score.mean()}')
y_pred = cross_val_predict(pipe, X_train, y_train_class, cv=kf)
print(f'confusion matrix \n{confusion_matrix(y_train_class, y_pred)}\n')
The random forest model improves its recall to ~84% when oversampling, undersampling and feature elimination are applied. This result is generated from k folds cross validation with k equal to 10. The cross validation is performed on the training data so that a portion of the data remains completely unseen by the model. Hyperparamer tuning will be applied to the Random Forest Classifier and then the resulting model is tested on the unseen test data, the test recall will be compared to the cross validation recall to measure validity.
#Hyperparameter tuning
pipe = Pipeline([('rscale', rob_scale), ('os', oversample), ('us', undersample), ('rfe', rfe),
('rf', RandomForestClassifier(random_state=13,))])
params = {
'rf__bootstrap': [True, False],
'rf__max_depth': [50, 60 ,70, 80, 90],
'rf__max_features': [None, 'sqrt', 'log2'],
'rf__min_samples_leaf': [3, 4, 5],
'rf__min_samples_split': [2, 3, 4, 5],
'rf__n_estimators': [int(x) for x in np.linspace(start = 100, stop = 500, num = 5)]}
grid_search = GridSearchCV(estimator = pipe, param_grid = params,
scoring='recall_macro', refit=True, cv = 3, n_jobs = -1, verbose = 2)
grid_search.fit(X_train, y_train_class)
display(grid_search.best_params_ )
best_grid=grid_search.best_estimator_
best_grid.fit(X_train, y_train_class)
class_pred = best_grid.predict(X_test)
print(f'Random Forest:\nClass\n{metrics.classification_report(y_test_class, class_pred)}\n')
print(f'confusion matrix: \n {confusion_matrix(y_test_class, class_pred)}')
indx.append('test data class recall')
cv.append(metrics.recall_score(y_test_class, class_pred, average='macro', zero_division=1))
stds.append('NaN')
predicted_probas = best_grid.predict_proba(X_test)
skplt.metrics.plot_roc(y_test_class, predicted_probas, figsize=(10,10))
plt.show()
plt.close()
The Random forest model proved to be the best at predicting fetal heart rate patterns (CLASS), after tuning the macro recall of the test data was improved around 5% for ~77% to ~82%. This is important because any error in predicting fetal heart rate pattern will compound in the chain classifier prediction of fetal state.
The first model in my chain classifier is now tuned. Using this model, class values are predicted for the test data. All five models will now be tested as the second classifier in the chain. These models will be crossvalidated using K Folds cross validation. These models will only be crossvalidated on the training data, so that a portion of the data remains completely unseen by the models. The models are then tested on the unseen test data, and the test recall value is compared to the recall of the cross validated model to measure validity.
#change sampling for NSP
oversample= SMOTE(sampling_strategy={2.0:400, 3.0:400}, random_state=13)
undersample = RandomUnderSampler(sampling_strategy={1:800}, random_state=13)
#using model trained on oversampled data, predict the class for the chain classifier
#column names and order
col = X_train.columns
dummy_col = ctg_data.loc[:,'LB':"SUSP"].columns
#predicted test class, and actual train class
test_class = class_pred
train_class = y_train_class
#The data has their dummy variable as '-1' for 'no' and '1' for yes, swapping to '0' for no for consistency
test_dummies = pd.get_dummies(test_class).rename(columns={1.0:'A', 2.0:'B', 3.0:'C', 4.0:'D', 5.0:'E', 6.0:'AD',
7.0:'DE', 8.0:'LD', 9.0:'FS', 10.0:'SUSP'})
train_dummies = pd.get_dummies(train_class).rename(columns={1.0:'A', 2.0:'B', 3.0:'C', 4.0:'D', 5.0:'E', 6.0:'AD',
7.0:'DE', 8.0:'LD', 9.0:'FS', 10.0:'SUSP'})
X_test3 = pd.concat([X_test.reset_index(drop=True), test_dummies], axis=1)
X_test3 = X_test3[dummy_col]
X_train3 = pd.concat([X_train.reset_index(drop=True), train_dummies], axis=1)
X_train3 = X_train3[dummy_col]
#chain classifier cross validation
kf = KFold(n_splits=10, shuffle=True, random_state=13)
models = [('Support Vector Machine', svm.SVC(kernel='linear')),
('Logistic Regression', LogisticRegression(solver='newton-cg',n_jobs=-1)),
('K-Nearest Neighbors', KNeighborsClassifier(weights='distance')),
('Decision Tree', tree.DecisionTreeClassifier(random_state=13)),
('Random Forest', RandomForestClassifier(random_state=13, n_estimators=100))]
#loop through the models
mean = []
std = []
classifier = []
for clf, model in models:
classifier.append(clf)
pipe = Pipeline([('rscale', rob_scale), ('os', oversample), ('us', undersample),('rfe', rfe), ('clf', model)])
score = cross_val_score(pipe, X_train3, y_train_nsp, scoring='recall_macro', cv=kf, n_jobs=-1)
mean.append(score.mean())
std.append(score.std())
print(clf + ':')
print(f'recall: {score.mean()}')
y_pred_smote = cross_val_predict(pipe, X_train3, y_train_nsp, cv=kf, n_jobs=-1)
print(f'confusion matrix \n{confusion_matrix(y_train_nsp, y_pred_smote)}\n')
#dataframe for easy viewing
smote_chain_cv = pd.DataFrame({'CV Recall Mean':mean, 'Std':std}, index=classifier)
display(smote_chain_cv)
Those are good scores, unfortunately these models have not seen the predicted fetal heart rate pattern data, only the true fetal heart rate pattern data. Since all five models scored really well I will test all five as the final part of my chain classifier.
#chain classifier all models
models = [('Support Vector Machine', svm.SVC(kernel='linear', probability=True, decision_function_shape='ovr', break_ties=True)),
('Logistic Regression', LogisticRegression(solver='newton-cg',n_jobs=-1)),
('K-Nearest Neighbors', KNeighborsClassifier(weights='distance')),
('Decision Tree', tree.DecisionTreeClassifier(random_state=13)),
('Random Forest', RandomForestClassifier(random_state=13, n_estimators=100))]
#loop the models
recall = []
for clf, model in models:
pipe = Pipeline([('rscale', rob_scale), ('os', oversample), ('us', undersample),('rfe', rfe), ('clf', model)])
pipe.fit(X_train3, y_train_nsp)
chain_pred = pipe.predict(X_test3)
print(clf + ':')
print(classification_report(y_test_nsp, chain_pred))
print(f'confusion matrix \n{confusion_matrix(y_test_nsp, chain_pred)}\n')
recall.append(metrics.recall_score(y_test_nsp, chain_pred, average='macro', zero_division=1))
predicted_probas = pipe.predict_proba(X_test3)
skplt.metrics.plot_roc(y_test_nsp, predicted_probas, figsize=(10,10))
plt.show()
plt.close()
#dataFrame for easy viewing
smote_chain_cv['chain_recall'] = recall
display(smote_chain_cv)
Four models have ~90% recall. Since Random Forest had the best recall when used as the final classifier in the baseline chain classifier, I will attempt to tune this model further.
#cross validate results for random forest class model
kf = KFold(n_splits=10, shuffle=True, random_state=13)
indx.append('k folds chain mean recall')
#feature selection
rf= RandomForestClassifier(random_state=13)
rfe = RFE(rf, n_features_to_select=15)
pipe = Pipeline([('rscale', rob_scale), ('os', oversample), ('us', undersample), ('rfe', rfe),
('rf', RandomForestClassifier(random_state=13, n_estimators=100))])
#cross validation
score = cross_val_score(pipe, X_train3, y_train_nsp, scoring='recall_macro', cv=kf)
cv.append(score.mean())
stds.append(score.std())
print(clf + ':')
print(f'k folds (k=10) mean recall: {score.mean()}')
chain_pred = cross_val_predict(pipe, X_train3, y_train_nsp, cv=kf)
print(f'confusion matrix \n{confusion_matrix(y_train_nsp, chain_pred)}\n')
The random forest model can predict fetal state with ~97% recall when provided actual fetal heart rate pattern values.
Hyperparameter tuning and feature elimination are performed to try to get the fetal state prediction recall that high when the model is used to predict the test data.
#feature selection
rf= RandomForestClassifier(random_state=13)
rfe = RFE(rf, n_features_to_select=15)
pipe = Pipeline([('rscale', rob_scale), ('os', oversample), ('us', undersample), ('rfe', rfe),
('rf', RandomForestClassifier(random_state=13))])
#hyperparameter tuning
params = {
'rf__bootstrap': [True, False],
'rf__max_depth': [50, 60 ,70, 80, 90],
'rf__max_features': [None, 'sqrt', 'log2'],
'rf__min_samples_leaf': [3, 4, 5],
'rf__min_samples_split': [2, 3, 4, 5],
'rf__n_estimators': [int(x) for x in np.linspace(start = 100, stop = 500, num = 5)]}
grid_search = GridSearchCV(estimator = pipe, param_grid = params,
scoring='recall_macro', refit=True, cv = 3, n_jobs = -1, verbose = 2)
grid_search.fit(X_train3, y_train_nsp)
display(grid_search.best_params_ )
best_grid=grid_search.best_estimator_
#best hyperparameter model is used to predict NSP
best_grid.fit(X_train3, y_train_nsp)
chain_pred = best_grid.predict(X_test3)
print(f'Random Forest:\nClass\n{metrics.classification_report(y_test_nsp, chain_pred)}\n')
print(f'confusion matrix: \n {confusion_matrix(y_test_nsp, chain_pred)}')
indx.append('test data nsp chain recall')
cv.append(metrics.recall_score(y_test_nsp, chain_pred, average='macro', zero_division=1))
stds.append('NaN')
predicted_probas = best_grid.predict_proba(X_test3)
skplt.metrics.plot_roc(y_test_nsp, predicted_probas, figsize=(10,10))
plt.show()
plt.close()
final_df = pd.DataFrame({'recall':cv, 'std':stds}, index=indx)
final_df.reset_index(inplace=True)
final_df['from'] = ['first classifier cross validation', 'first classifier cross validation', 'second classifier cross validation', 'second classifier cross validation']
final_df.set_index(['from', 'index'], inplace=True)
final_df = final_df.rename_axis(index={'from': 'Step in Chain Classifier', 'index': ' '})
final_df
The test data class recall is very close to the cross validated mean recall, this result is most likely valid. The test data NSP recall was not that close to the cross validated NSP recall of the second classifier in the chain classifier. This is most likely due to the test data having fetal heart rate states predicted from another model. The ~89% recall obtained after tuning the second random forest classifier in the classifier chain is very close to the recall of the classifer chain with the first random forest classifier tuned and the second random forest classifier untuned. This leads me to believe that the second random forest classifier is already near optimal and the only way to improve it further would be to improve the first classifier, thus reducing false labels being used to predict the next label.
print('Model Selection')
display(model_select)
print('\n'*2)
print('Chain Classifier Cross Validation')
print('First Classifier tuned, Second Classifier Validation')
display(smote_chain_cv)
print('\n'*2)
print('Chain Classifier Cross Validation')
print('')
display(final_df)
After tuning both classifiers in the classification chain it appears that the random forest classifier has peaked at around 90% recall. Without finding a way to more accurately predict CLASS I believe that this is about as accurate as I can get the chain classifier. However when the chain classifier is cross validated on the data containing the actual CLASS values, Logistic REgression and Random Forest Classifiers both had recall of ~97%. This leads me to believe that if a model that more accurately predicted class were generated, a chain classifier with recall approaching 97% could be generated.